df.head()
| sender_name | timestamp_ms | content | type | photos | gifs | audio_files | year | month | hour | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Mélanie Amazo'Night Bergeot | 2020-03-29 07:55:58.952 | :p | Generic | NaN | NaN | NaN | 2020 | 3 | 07:55 |
| 1 | Nicolas Cailleux | 2020-03-29 07:43:47.446 | t'as de quoi faire un peu quand meme :p | Generic | NaN | NaN | NaN | 2020 | 3 | 07:43 |
| 2 | Nicolas Cailleux | 2020-03-29 07:43:41.209 | exactement ça va | Generic | NaN | NaN | NaN | 2020 | 3 | 07:43 |
| 3 | Nicolas Cailleux | 2020-03-29 07:43:38.139 | enfin 59340 | Generic | NaN | NaN | NaN | 2020 | 3 | 07:43 |
| 4 | Nicolas Cailleux | 2020-03-29 07:42:49.732 | 60k msg | Generic | NaN | NaN | NaN | 2020 | 3 | 07:42 |
print(str(nb_day_first_last)+ ' day between ' +str(first_day)+ ' and ' +str(last_day))
1263 day between 2016-10-13 and 2020-03-29
#Calculate the number of "active day"
test_activeday=df
test_activeday['date'] = pd.to_datetime(test_activeday['timestamp_ms']).dt.date
nb_active_days=test_activeday["date"].nunique()
#%percentage of active day
pc_active_days=int(((nb_active_days*100)/nb_day_first_last))
print("%d (%d percent) of those were ‘active’ days (i.e. messages were sent)."%(nb_active_days,pc_active_days))
#voir comment le modifier en pourcentage
1084 (85 percent) of those were ‘active’ days (i.e. messages were sent).
print("%d messages. I sent %d more messages than my boyfriend"%(nb_msg_send,nb_diff_msg_send))
59389 messages. I sent 1277 more messages than my boyfriend
print("%d unique words were used (many of which are not real words). I used %d unique words where my boyfriend used %d."%(nb_unique_word,nb_unique_word_mel,nb_unique_word_nico))
6927 unique words were used (many of which are not real words). I used 4244 unique words where my boyfriend used 2683.
print("2 participants - my boyfriends and I - living in FR most of the time and in our early twenties.")
2 participants - my boyfriends and I - living in FR most of the time and in our early twenties.
import datetime
# Create figure and plot space
fig, ax = plt.subplots(figsize=(18, 10))
# Add x-axis and y-axis
ax.plot(df_msg_day.index.values,
df_msg_day['nb_msg_per_day'],
color='green')
# Set title and labels for axes
ax.set(xlabel="Date",
ylabel="nb_msg_send",
title="Daily Total messages send to each other\n")
#rolling_mean4 = df_msg_day['nb_msg_per_day'].rolling(window=150).mean() #SMA
#exp2 = df_msg_day['nb_msg_per_day'].ewm(span=50, adjust=False).mean() #EMA
exp3 = df_msg_day['nb_msg_per_day'].ewm(span=150, adjust=False).mean() #EMA
#plt.plot(df_msg_day.index.values, exp2, label='Msg 50 Day EMA',color='red')
plt.plot(df_msg_day.index.values, exp3, label='Msg 150 Day EMA',color='blue')
date_str = '02-08-2017'
date_object = datetime.datetime.strptime(date_str, '%m-%d-%Y').date()
plt.axvline(date_object, 0, 1, label='Begining of our relation', color= 'pink')
plt.legend()
plt.show()
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
avg_msg_day = df_msg_dayofweek['average_msg_day']
Day=df_msg_dayofweek['day']
y_pos = np.arange(len(Day))
# Create horizontal bars
plt.barh(y_pos, avg_msg_day,color=['black','navy','teal','darkgreen','green','yellowgreen','yellow'])
# Create names on the y-axis
plt.yticks(y_pos, Day)
ax.set_xlabel('Average msg send per day')
ax.set_title('Average number of messages per day of the week')
# Show graphic
plt.show()
import numpy as numpy
import matplotlib.pyplot as plt
from matplotlib.dates import date2num, DateFormatter
import datetime as dt
x = pd.to_datetime(df_msg_hour.index.values)
y = df_msg_hour['average msg per minutes']
x = date2num(x) # Convert datetime objects to the correct format for matplotlib.
fig, ax = plt.subplots(figsize=(18,10))
ax.plot_date(x, y,'b-',color='green') # Use plot_date rather than plot
# Set the xaxis major formatter as a DateFormatter object
# The string argument shows what format you want (HH:MM:SS)
ax.xaxis.set_major_formatter(DateFormatter('%H:%M'))
# Set title and labels for axes
ax.set(xlabel="Hour",
ylabel="average msg per minutes",
title="Hour of messages send to each other\n")
exp3 = y.ewm(span=60, adjust=False).mean() #EMA
plt.plot(x, exp3, label='Msg hour EMA',color='blue')
# This simply makes them look pretty by setting them diagonal.
fig.autofmt_xdate()
plt.legend()
plt.show()
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px
fig = make_subplots(
rows=1, cols=2,
specs=[[{"type": "xy"},{"type": "xy"}],
],
subplot_titles=( "Mel","Nico")
)
bar_nico=go.Bar(x=top_10_exp_nico_final['count'],
y=top_10_exp_nico_final['vals'],
text=top_10_exp_nico_final['vals'],
textposition='auto',
orientation='h',
hoverinfo='x',
marker_color=px.colors.qualitative.Safe[0],
)
bar_mel =go.Bar(x=top_10_exp_mel_final['count'],
y=top_10_exp_mel_final['vals'],
text=top_10_exp_mel_final['vals'],
textposition='auto',
orientation='h',
hoverinfo='x',
marker_color=px.colors.sequential.Magenta[1],
)
fig.add_trace(bar_mel, row=1, col=1)
fig.update_xaxes(title_text=" ",autorange='reversed', row=1, col=1)
fig.update_yaxes(visible=False, row=1, col=1)
fig.add_trace(bar_nico,row=1, col=2)
fig.update_xaxes(title_text=" ", range=[0,180], row=1, col=2)
fig.update_yaxes(visible=False, row=1, col=2)
#fonctionne aussi mais que pour la première figure
#fig['layout']['xaxis']['autorange'] = "reversed"
fig.update_layout(height=600,width=600,autosize=False, showlegend=False,title_text="Word-phrase most used",title_x=0.5)
#autosize=False,
# width=600,
# height=700
fig.show()
from plotly.subplots import make_subplots
import plotly.graph_objects as go
fig = make_subplots(
rows=1, cols=2,
specs=[[{"type": "xy"},{"type": "xy"}],
],
subplot_titles=("Nico","Mel")
)
bar_nico=go.Bar(x=wordcount_Nico_10['count'],
y=wordcount_Nico_10['vals'],
text=wordcount_Nico_10['vals'],
textposition='auto',
orientation='h',
hoverinfo='x',
marker_color=px.colors.qualitative.Safe[0],
)
bar_mel =go.Bar(x=wordcount_Mel_10['count'],
y=wordcount_Mel_10['vals'],
text=wordcount_Mel_10['vals'],
textposition='auto',
orientation='h',
hoverinfo='x',
marker_color=px.colors.sequential.Magenta[1],
)
fig.add_trace(bar_nico,row=1, col=1)
fig.update_xaxes(title_text=" ",autorange='reversed', showgrid=False, row=1, col=1)
fig.update_yaxes(visible=False,autorange='reversed',showgrid=False,row=1, col=1)
fig.add_trace(bar_mel, row=1, col=2)
fig.update_xaxes(title_text=" ", showgrid=False,range=[0,3300],row=1, col=2)
fig.update_yaxes(visible=False,autorange='reversed', showgrid=False, row=1, col=2)
#fonctionne aussi mais que pour la première figure
#fig['layout']['xaxis']['autorange'] = "reversed"
#fig['layout']['xaxis']['range'] =[30,0]
fig.update_layout(height=600,width=600,autosize=False, showlegend=False,title_text="Word most used",title_x=0.5)
fig.show()
import plotly.graph_objects as go
fig = go.Figure(data=[
go.Bar(name='Mel', x=wordcount_Mel_cheri['count'], y=wordcount_Mel_cheri['vals'],orientation='h',marker_color=px.colors.sequential.Magenta[1]),
go.Bar(name='Nico', x=wordcount_Nico_chat['count'], y=wordcount_Nico_chat['vals'],orientation='h',marker_color=px.colors.qualitative.Safe[0])
])
# Change the bar mode
fig.update_layout(barmode='group')
fig.update_layout(height=600,width=600,autosize=False, title_text="Nickname lover's winner is ",title_x=0.5)
fig.show()
import matplotlib.animation as animation
from IPython.display import HTML
fig, ax = plt.subplots(figsize=(15,8))
animator = animation.FuncAnimation(fig, draw_barchart_test, frames=range(2017, 2021))
HTML(animator.to_jshtml())
# or use animator.to_html5_video() or animator.save()